
scalar nb_keep = 7

*** hhheadhukou: for the only household head

use ../data/census1982individual.dta, clear

gen hhh = (hstatus == 1) //indicator for household head
tab hhh
bys hhcode: gen numhhh = sum(hhh)
keep if numhhh == 1 
keep if hstatus == 1 
gen hhheadagriemp = agriemp
keep hhcode hhheadagriemp
count //2,212,582
save ../temp/census1982_hhheadhukou,replace


*** Family structure

use ../data/census1982individual, clear
		
tab hstatus // 1 hhhead, 2 spouse, 3 child, 4 grandchild, 5 parent, 6 grandparent, 7 other relative, 8 non-relative

tab fstatus // 1 "father" 2 "mother" 3 "child" 4 "grandchild" 5 "grandparent" 6 "grandgrandparent" 7 "other relatives" 8 "other non-relatives"

tab fstatus, gen(fstatus) 

sum fstatus*

* young and old
count if missing(age)

gen young = age < 15 

gen old = age > 64 & !missing(age)

tab1 young old

collapse (sum) nfather = fstatus1 nmother = fstatus2 ///
nchild = fstatus3 ngrandchild = fstatus4 ngrandparent = fstatus5 ///
ngrandgrandparent = fstatus6 notherrel = fstatus7 nothernonrel = fstatus8 ///
(sum) nyoung = young nold = old ///
(count) nperson = hstatus ///
, by(hhcode) fast

gen nother = ngrandgrandparent + notherrel + nothernonrel

sum n*

save ../temp/census1982_famstruc, replace


*** parents 

use ../data/census1982individual, clear

merge m:1 hhcode using ../temp/census1982_famstruc
drop _merge

tab nfather nmother // some hh has more than one father or more than one mother
drop if nfather > 1 | nmother > 1

keep if fstatus == 1 | fstatus == 2 // father 1, mother 2

preserve
	* father 1, mother 2
	gen pcode = "P1" if fstatus == 1
	replace pcode = "P2" if fstatus == 2
	
	local pvars "age birthy male han ethn agriemp localhukou eduy emp industry occupation unempstatus "
	keep prov hhcode pcode `pvars'
	reshape wide `pvars', i(hhcode) j(pcode) string
	
	save ../temp/census1982_parent, replace
restore

* mother's birth information
keep if fstatus == 2


gl BVARS "chborn chsurv chdece chbornLY"

foreach var of varlist $BVARS {
	rename `var' `var'P2
}

keep hhcode *P2

save ../temp/census1982_motherbirth, replace
	

*** children

use ../data/census1982individual, clear

keep if fstatus == 3 // children

count if missing(birthy)

* child birth order
sort hhcode birthy
by hhcode: gen birthorder = _n

tab birthorder,m

* if twin sibling
bys hhcode birthy: gen twin = (_N == 2) 

bys hhcode birthy: gen triplet = (_N == 3) 

bys hhcode birthy: gen quadruplet = (_N == 4) 

* twin code
sort hhcode birthy birthorder
by hhcode birthy: gen twcode = _n if twin == 1
label var twcode "Twin Code"

tab twcode,m

drop $BVARS

save ../temp/census1982_childlong, replace


*** household level, child aggregate	

use ../temp/census1982_childlong, replace

* number of children
tab male,m
bys hhcode: gen nchild = _N
bys hhcode: egen nmale = sum(male)	

* number of twins
by hhcode: egen ntwin = sum(twin)
gen maletwin = male*twin
by hhcode: egen nmaletwin = sum(maletwin)

* number of triplets
by hhcode: egen ntriplet = sum(triplet)

* number of quadruplet
by hhcode: egen nquadruplet = sum(quadruplet)

* order of twinning
gen twinorder = birthorder if twin == 1

tab twinorder,m // now still twin's birthorder, later collapse

* collapse to hh level
keep hhcode nchild nmale ntwin nmaletwin ntriplet nquadruplet twinorder age birthy
collapse (first) nchild nmale ntwin nmaletwin ntriplet nquadruplet (min) twinorder ///
(min) minchildage = age (max) maxchildage = age ///
(min) minchildbirthy = birthy (max) maxchildbirthy = birthy ///
, by(hhcode) fast

foreach var of varlist _all {
	label var `var' ""
}

tab ntwin 

g twinsex = 1 if nmaletwin == 2 & ntwin == 2
replace twinsex = 2 if nmaletwin == 0 & ntwin == 2
replace twinsex = 3 if nmaletwin == 1 & ntwin == 2

tab ntriplet // 290 hh with triplets

tab nquadruplet // 1 hh with quadruplets

g twinhh = ntwin >= 2

save ../temp/census1982_childcomposition, replace


*** children, wide form	

use ../temp/census1982_childlong, replace

keep if birthorder <= nb_keep

g ccode = ""
local n = nb_keep
display `n'
forvalues i = 1/`n' {
	display `i'
	replace ccode = "C`i'" if birthorder == `i'
}

local cvars "age birthy male han agriemp localhukou eduy emp "

keep hhcode ccode `cvars'
reshape wide `cvars', i(hhcode) j(ccode) string

save ../temp/census1982_childwide, replace


*** Merge household level data

* family structure
use ../temp/census1982_famstruc, clear

* parents information 
merge 1:1 hhcode using ../temp/census1982_parent
drop _merge

* hukou of household head
merge 1:1 hhcode using ../temp/census1982_hhheadhukou
drop _merge

* mother's fertility 
merge 1:1 hhcode using ../temp/census1982_motherbirth
drop _merge

* hh level child aggregate
merge 1:1 hhcode using ../temp/census1982_childcomposition
replace nchild = 0 if _merge == 1 // right, 0 zero change
replace nmale = 0 if _merge == 1
replace ntwin = 0 if _merge == 1
replace ntriplet = 0 if _merge == 1
replace nquadruplet = 0 if _merge == 1
drop _merge


tab nchild nmale,m

g nfemale = nchild-nmale
tab nfemale,m

gen maleratio = nmale/nchild 
sum maleratio


g sampleChCoreside = nchild == chsurvP2 & chsurvP2 == chbornP2 & !missing(chsurvP2) 
// all chidlren coreside, not death

tab sampleChCoreside
tab sampleChCoreside if ageP2 >= 15 & ageP2 < 65 // 52.68%

g sampleChCoresideMoreThanBirth = ageP2 >= 15 & ageP2 < 65 & (nchild > chsurvP2) 

tab sampleChCoresideMoreThanBirth // 2.72%

* child, wide form
merge 1:1 hhcode using ../temp/census1982_childwide
drop _merge

* age at first birth
local n = nb_keep
forvalues i = 1/`n' {
	g age`i'birthP2 = birthyC`i' - birthyP2 if sampleChCoreside
}


*** additional variables

* both parents Han

g hanP = hanP1 * hanP2

tab hanP,m // 

*** samples

* twinning samples
forvalues i = 1/5 {
	g sample`i'plus = twinhh == 0 & nchild >= `i' | twinhh == 1 & twinorder == `i'
}
	
order hhcode prov hhhead*

foreach var of varlist _all {
	label var `var' ""
}

compress

save ../data/census1982hh, replace

*** Merge child level data

use ../temp/census1982_childlong, clear

merge m:1 hhcode using ../data/census1982hh
drop if _merge == 2
drop _merge

foreach var of varlist _all {
	label var `var' ""
}	

compress

save ../data/census1982child, replace



*** erase temporary files to release space
	
erase ../temp/census1982_childcomposition.dta
erase ../temp/census1982_childlong.dta
erase ../temp/census1982_childwide.dta
erase ../temp/census1982_famstruc.dta
erase ../temp/census1982_hhheadhukou.dta
erase ../temp/census1982_motherbirth.dta
erase ../temp/census1982_parent.dta	




	
